In [7]:
import pandas as pd
import numpy as np
import os
import sys
# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
# to play the audio files
from IPython.display import Audio
import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [8]:
import zipfile
with zipfile.ZipFile('archive baby.zip', 'r') as zip_ref:
zip_ref.extractall('./kaggle/input/surrey-audiovisual-expressed-emotion-savee')
In [9]:
import zipfile
with zipfile.ZipFile('archive corner.zip', 'r') as zip_ref:
zip_ref.extractall('./kaggle/input/toronto-emotional-speech-set-te')
In [10]:
import zipfile
with zipfile.ZipFile('archive flow.zip', 'r') as zip_ref:
zip_ref.extractall('./kaggle/input/ravdess-emotional-speech-audio')
In [11]:
import zipfile
with zipfile.ZipFile('archive stay.zip', 'r') as zip_ref:
zip_ref.extractall('./kaggle/input/cremad')
In [12]:
Ravdess = "./kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"
Crema = "./kaggle/input/cremad/AudioWAV/"
Tess = "./kaggle/input/toronto-emotional-speech-set-tes/TESS Toronto emotional speech set data/TESS Toronto emotional speech set data/"
Savee = "./kaggle/input/surrey-audiovisual-expressed-emotion-savee/ALL/"
In [13]:
ravdess_directory_list = os.listdir(Ravdess)
file_emotion = []
file_path = []
for dir in ravdess_directory_list:
# as their are 20 different actors in our previous directory we need to extract files for each actor.
actor = os.listdir(Ravdess + dir)
for file in actor:
part = file.split('.')[0]
part = part.split('-')
# third part in each file represents the emotion associated to that file.
file_emotion.append(int(part[2]))
file_path.append(Ravdess + dir + '/' + file)
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Ravdess_df = pd.concat([emotion_df, path_df], axis=1)
# changing integers to actual emotions.
Ravdess_df.Emotions.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'}, inplace=True)
Ravdess_df.head()
Out[13]:
| Emotions | Path | |
|---|---|---|
| 0 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 1 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 2 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 3 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 4 | calm | ./kaggle/input/ravdess-emotional-speech-audio/... |
In [14]:
crema_directory_list = os.listdir(Crema)
file_emotion = []
file_path = []
for file in crema_directory_list:
# storing file paths
file_path.append(Crema + file)
# storing file emotions
part=file.split('_')
if part[2] == 'SAD':
file_emotion.append('sad')
elif part[2] == 'ANG':
file_emotion.append('angry')
elif part[2] == 'DIS':
file_emotion.append('disgust')
elif part[2] == 'FEA':
file_emotion.append('fear')
elif part[2] == 'HAP':
file_emotion.append('happy')
elif part[2] == 'NEU':
file_emotion.append('neutral')
else:
file_emotion.append('Unknown')
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)
Crema_df.head()
Out[14]:
| Emotions | Path | |
|---|---|---|
| 0 | angry | ./kaggle/input/cremad/AudioWAV/1001_DFA_ANG_XX... |
| 1 | disgust | ./kaggle/input/cremad/AudioWAV/1001_DFA_DIS_XX... |
| 2 | fear | ./kaggle/input/cremad/AudioWAV/1001_DFA_FEA_XX... |
| 3 | happy | ./kaggle/input/cremad/AudioWAV/1001_DFA_HAP_XX... |
| 4 | neutral | ./kaggle/input/cremad/AudioWAV/1001_DFA_NEU_XX... |
In [15]:
tess_directory_list = os.listdir(Tess)
file_emotion = []
file_path = []
for dir in tess_directory_list:
directories = os.listdir(Tess + dir)
for file in directories:
part = file.split('.')[0]
part = part.split('_')[2]
if part=='ps':
file_emotion.append('surprise')
else:
file_emotion.append(part)
file_path.append(Tess + dir + '/' + file)
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Tess_df = pd.concat([emotion_df, path_df], axis=1)
Tess_df.head()
Out[15]:
| Emotions | Path | |
|---|---|---|
| 0 | angry | ./kaggle/input/toronto-emotional-speech-set-te... |
| 1 | angry | ./kaggle/input/toronto-emotional-speech-set-te... |
| 2 | angry | ./kaggle/input/toronto-emotional-speech-set-te... |
| 3 | angry | ./kaggle/input/toronto-emotional-speech-set-te... |
| 4 | angry | ./kaggle/input/toronto-emotional-speech-set-te... |
In [16]:
savee_directory_list = os.listdir(Savee)
file_emotion = []
file_path = []
for file in savee_directory_list:
file_path.append(Savee + file)
part = file.split('_')[1]
ele = part[:-6]
if ele=='a':
file_emotion.append('angry')
elif ele=='d':
file_emotion.append('disgust')
elif ele=='f':
file_emotion.append('fear')
elif ele=='h':
file_emotion.append('happy')
elif ele=='n':
file_emotion.append('neutral')
elif ele=='sa':
file_emotion.append('sad')
else:
file_emotion.append('surprise')
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
Savee_df = pd.concat([emotion_df, path_df], axis=1)
Savee_df.head()
Out[16]:
| Emotions | Path | |
|---|---|---|
| 0 | angry | ./kaggle/input/surrey-audiovisual-expressed-em... |
| 1 | angry | ./kaggle/input/surrey-audiovisual-expressed-em... |
| 2 | angry | ./kaggle/input/surrey-audiovisual-expressed-em... |
| 3 | angry | ./kaggle/input/surrey-audiovisual-expressed-em... |
| 4 | angry | ./kaggle/input/surrey-audiovisual-expressed-em... |
In [17]:
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()
Out[17]:
| Emotions | Path | |
|---|---|---|
| 0 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 1 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 2 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 3 | neutral | ./kaggle/input/ravdess-emotional-speech-audio/... |
| 4 | calm | ./kaggle/input/ravdess-emotional-speech-audio/... |
In [18]:
plt.title('Count of Emotions', size=16)
sns.countplot(data_path.Emotions)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()
In [19]:
def create_waveplot(data, sr, e):
plt.figure(figsize=(10, 3))
plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
librosa.display.waveplot(data, sr=sr)
plt.show()
def create_spectrogram(data, sr, e):
# stft function converts the data into short term fourier transform
X = librosa.stft(data)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(12, 3))
plt.title('Spectrogram for audio with {} emotion'.format(e), size=15)
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
#librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()
In [20]:
# emotion='fear'
# path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
# data, sampling_rate = librosa.load(path)
# create_waveplot(data, sampling_rate, emotion)
# create_spectrogram(data, sampling_rate, emotion)
# Audio(path)
import librosa
import librosa.display
import matplotlib.pyplot as plt
def create_waveplot(data, sr, e):
plt.figure(figsize=(10, 3))
plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
plt.plot(data)
plt.show()
emotion='fear'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[20]:
In [21]:
emotion='angry'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[21]:
In [22]:
emotion='sad'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[22]:
In [23]:
emotion='happy'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[23]:
In [24]:
def noise(data):
noise_amp = 0.035*np.random.uniform()*np.amax(data)
data = data + noise_amp*np.random.normal(size=data.shape[0])
return data
def stretch(data, rate=0.8):
return librosa.effects.time_stretch(data, rate)
def shift(data):
shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
return np.roll(data, shift_range)
def pitch(data, sampling_rate, pitch_factor=0.7):
return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
# taking any example and checking for techniques.
path = np.array(data_path.Path)[1]
data, sample_rate = librosa.load(path)
In [25]:
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=data, sr=sample_rate)
# Audio(path)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
plt.figure(figsize=(14, 4))
plt.plot(data) # Assuming data is your audio signal
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Audio waveform')
plt.show()
Audio(path) # Assuming path is the path to your audio file
Out[25]:
In [26]:
# x = noise(data)
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=x, sr=sample_rate)
# Audio(x, rate=sample_rate)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
import numpy as np
def noise(data, factor=0.005):
noise = np.random.normal(0, 1, len(data))
return data + factor * noise
x = noise(data)
plt.figure(figsize=(14, 4))
plt.plot(x) # Assuming x is your noisy audio signal
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Noisy Audio waveform')
plt.show()
Audio(x, rate=sample_rate) # Assuming sample_rate is the sample rate of your audio
Out[26]:
In [27]:
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
def stretch(data, rate=0.8):
return librosa.effects.time_stretch(data, rate=rate) # Specify rate as a keyword argument
x = stretch(data)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Stretched Audio waveform')
plt.show()
Audio(x, rate=sample_rate)
Out[27]:
In [28]:
# x = shift(data)
# plt.figure(figsize=(14,4))
# librosa.display.waveplot(y=x, sr=sample_rate)
# Audio(x, rate=sample_rate)
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
def shift(data, shift_amount=500):
return np.roll(data, shift_amount)
x = shift(data)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Shifted Audio waveform')
plt.show()
Audio(x, rate=sample_rate)
Out[28]:
In [29]:
import librosa
import matplotlib.pyplot as plt
from IPython.display import Audio
def pitch(data, sampling_rate, pitch_factor=0.7):
return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=int(pitch_factor * 100))
x = pitch(data, sample_rate)
plt.figure(figsize=(14, 4))
plt.plot(x)
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.title('Pitch-shifted Audio waveform')
plt.show()
Audio(x, rate=sample_rate)
Out[29]:
In [30]:
def extract_features(data):
# ZCR
result = np.array([])
zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
result=np.hstack((result, zcr)) # stacking horizontally
# Chroma_stft
stft = np.abs(librosa.stft(data))
chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
result = np.hstack((result, chroma_stft)) # stacking horizontally
# MFCC
mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mfcc)) # stacking horizontally
# Root Mean Square Value
rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
result = np.hstack((result, rms)) # stacking horizontally
# MelSpectogram
mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
result = np.hstack((result, mel)) # stacking horizontally
return result
def get_features(path):
# duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
# without augmentation
res1 = extract_features(data)
result = np.array(res1)
# data with noise
noise_data = noise(data)
res2 = extract_features(noise_data)
result = np.vstack((result, res2)) # stacking vertically
# data with stretching and pitching
new_data = stretch(data)
data_stretch_pitch = pitch(new_data, sample_rate)
res3 = extract_features(data_stretch_pitch)
result = np.vstack((result, res3)) # stacking vertically
return result
In [46]:
data_path = pd.concat([Ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path[600:675]
total_rows = len(data_path)
print("Total number of rows in data_path.csv:", total_rows)
Total number of rows in data_path.csv: 12162
In [63]:
X, Y = [], []
for path, emotion in zip(data_path.Path, data_path.Emotions):
feature = get_features(path)
for ele in feature:
X.append(ele)
# appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
Y.append(emotion)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[63], line 3 1 X, Y = [], [] 2 for path, emotion in zip(data_path.Path, data_path.Emotions): ----> 3 feature = get_features(path) 4 for ele in feature: 5 X.append(ele) Cell In[30], line 41, in get_features(path) 39 # data with stretching and pitching 40 new_data = stretch(data) ---> 41 data_stretch_pitch = pitch(new_data, sample_rate) 42 res3 = extract_features(data_stretch_pitch) 43 result = np.vstack((result, res3)) # stacking vertically Cell In[29], line 6, in pitch(data, sampling_rate, pitch_factor) 5 def pitch(data, sampling_rate, pitch_factor=0.7): ----> 6 return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=int(pitch_factor * 100)) File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\effects.py:332, in pitch_shift(y, sr, n_steps, bins_per_octave, res_type, scale, **kwargs) 328 rate = 2.0 ** (-float(n_steps) / bins_per_octave) 330 # Stretch in time, then resample 331 y_shift = core.resample( --> 332 time_stretch(y, rate=rate, **kwargs), 333 orig_sr=float(sr) / rate, 334 target_sr=sr, 335 res_type=res_type, 336 scale=scale, 337 ) 339 # Crop to the same dimension as the input 340 return util.fix_length(y_shift, size=y.shape[-1]) File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\effects.py:248, in time_stretch(y, rate, **kwargs) 245 len_stretch = int(round(y.shape[-1] / rate)) 247 # Invert the STFT --> 248 y_stretch = core.istft(stft_stretch, dtype=y.dtype, length=len_stretch, **kwargs) 250 return y_stretch File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\librosa\core\spectrum.py:589, in istft(stft_matrix, hop_length, win_length, n_fft, window, center, dtype, length, out) 586 bl_t = min(bl_s + n_columns, n_frames) 588 # invert the block and apply the window function --> 589 ytmp = ifft_window * fft.irfft(stft_matrix[..., bl_s:bl_t], n=n_fft, axis=-2) 591 # Overlap-add the istft block starting at the i'th frame 592 __overlap_add(y[..., frame * hop_length + offset :], ytmp, hop_length) File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\fft\_pocketfft.py:513, in irfft(a, n, axis, norm) 511 n = (a.shape[axis] - 1) * 2 512 inv_norm = _get_backward_norm(n, norm) --> 513 output = _raw_fft(a, n, axis, True, False, inv_norm) 514 return output File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\fft\_pocketfft.py:73, in _raw_fft(a, n, axis, is_real, is_forward, inv_norm) 71 else: 72 a = swapaxes(a, axis, -1) ---> 73 r = pfi.execute(a, is_real, is_forward, fct) 74 r = swapaxes(r, axis, -1) 75 return r KeyboardInterrupt:
In [64]:
len(X), len(Y), data_path.Path.shape
Out[64]:
(1986, 1986, (12162,))
In [65]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('features.csv', index=False)
Features.head()
Out[65]:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | labels | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.321275 | 0.729664 | 0.750033 | 0.730624 | 0.735275 | 0.713529 | 0.660531 | 0.684966 | 0.733049 | 0.753972 | ... | 0.000004 | 0.000003 | 0.000002 | 0.000002 | 0.000005 | 0.000008 | 0.000007 | 0.000005 | 4.245834e-07 | neutral |
| 1 | 0.413755 | 0.884037 | 0.897497 | 0.899011 | 0.899051 | 0.881126 | 0.752190 | 0.722764 | 0.772003 | 0.818910 | ... | 0.001932 | 0.001748 | 0.001736 | 0.001750 | 0.001715 | 0.001871 | 0.001749 | 0.001632 | 1.710952e-03 | neutral |
| 2 | 0.490643 | 0.271094 | 0.255352 | 0.189282 | 0.290072 | 0.393606 | 0.335324 | 0.262065 | 0.382307 | 0.578054 | ... | 0.000055 | 0.000036 | 0.000007 | 0.000001 | 0.000009 | 0.000013 | 0.000011 | 0.000006 | 8.026328e-07 | neutral |
| 3 | 0.293566 | 0.673896 | 0.722096 | 0.723508 | 0.682302 | 0.680533 | 0.675352 | 0.628977 | 0.679179 | 0.707283 | ... | 0.000007 | 0.000007 | 0.000007 | 0.000007 | 0.000012 | 0.000010 | 0.000011 | 0.000006 | 4.254087e-07 | neutral |
| 4 | 0.414596 | 0.840906 | 0.881445 | 0.881813 | 0.871292 | 0.884976 | 0.804859 | 0.723588 | 0.751556 | 0.773561 | ... | 0.001784 | 0.001805 | 0.001766 | 0.001708 | 0.001776 | 0.001758 | 0.001801 | 0.001782 | 1.796141e-03 | neutral |
5 rows × 163 columns
In [66]:
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values
In [67]:
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(Y).reshape(-1,1)).toarray()
In [68]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[68]:
((1489, 162), (1489, 8), (497, 162), (497, 8))
In [69]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[69]:
((1489, 162), (1489, 8), (497, 162), (497, 8))
In [70]:
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[70]:
((1489, 162, 1), (1489, 8), (497, 162, 1), (497, 8))
In [71]:
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ conv1d_4 (Conv1D) │ (None, 162, 256) │ 1,536 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ max_pooling1d_4 (MaxPooling1D) │ (None, 81, 256) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ conv1d_5 (Conv1D) │ (None, 81, 256) │ 327,936 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ max_pooling1d_5 (MaxPooling1D) │ (None, 41, 256) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ conv1d_6 (Conv1D) │ (None, 41, 128) │ 163,968 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ max_pooling1d_6 (MaxPooling1D) │ (None, 21, 128) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_2 (Dropout) │ (None, 21, 128) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ conv1d_7 (Conv1D) │ (None, 21, 64) │ 41,024 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ max_pooling1d_7 (MaxPooling1D) │ (None, 11, 64) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ flatten_1 (Flatten) │ (None, 704) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 32) │ 22,560 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dropout_3 (Dropout) │ (None, 32) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_3 (Dense) │ (None, 8) │ 264 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 557,288 (2.13 MB)
Trainable params: 557,288 (2.13 MB)
Non-trainable params: 0 (0.00 B)
In [72]:
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])
Epoch 1/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 5s 82ms/step - accuracy: 0.1356 - loss: 2.0409 - val_accuracy: 0.1771 - val_loss: 1.9933 - learning_rate: 0.0010 Epoch 2/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 62ms/step - accuracy: 0.1769 - loss: 1.9990 - val_accuracy: 0.2052 - val_loss: 1.9769 - learning_rate: 0.0010 Epoch 3/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 61ms/step - accuracy: 0.1803 - loss: 1.9844 - val_accuracy: 0.2093 - val_loss: 1.9597 - learning_rate: 0.0010 Epoch 4/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 2s 61ms/step - accuracy: 0.2117 - loss: 1.9453 - val_accuracy: 0.2394 - val_loss: 1.9477 - learning_rate: 0.0010 Epoch 5/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2201 - loss: 1.9541 - val_accuracy: 0.2394 - val_loss: 1.9320 - learning_rate: 0.0010 Epoch 6/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2111 - loss: 1.9338 - val_accuracy: 0.2374 - val_loss: 1.8988 - learning_rate: 0.0010 Epoch 7/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2674 - loss: 1.8923 - val_accuracy: 0.2475 - val_loss: 1.8898 - learning_rate: 0.0010 Epoch 8/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2332 - loss: 1.8871 - val_accuracy: 0.2676 - val_loss: 1.8596 - learning_rate: 0.0010 Epoch 9/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2745 - loss: 1.8655 - val_accuracy: 0.2636 - val_loss: 1.8949 - learning_rate: 0.0010 Epoch 10/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.2772 - loss: 1.8602 - val_accuracy: 0.2716 - val_loss: 1.8254 - learning_rate: 0.0010 Epoch 11/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.2686 - loss: 1.8505 - val_accuracy: 0.2777 - val_loss: 1.8288 - learning_rate: 0.0010 Epoch 12/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.2827 - loss: 1.8022 - val_accuracy: 0.2797 - val_loss: 1.8532 - learning_rate: 0.0010 Epoch 13/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.2688 - loss: 1.8065 - val_accuracy: 0.2897 - val_loss: 1.8025 - learning_rate: 0.0010 Epoch 14/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2999 - loss: 1.7864 - val_accuracy: 0.2918 - val_loss: 1.7962 - learning_rate: 0.0010 Epoch 15/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.2919 - loss: 1.7617 - val_accuracy: 0.3159 - val_loss: 1.7643 - learning_rate: 0.0010 Epoch 16/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3024 - loss: 1.7525 - val_accuracy: 0.3219 - val_loss: 1.7750 - learning_rate: 0.0010 Epoch 17/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.2830 - loss: 1.7646 - val_accuracy: 0.3119 - val_loss: 1.7698 - learning_rate: 0.0010 Epoch 18/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.3070 - loss: 1.7253 - val_accuracy: 0.3139 - val_loss: 1.7468 - learning_rate: 4.0000e-04 Epoch 19/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.3407 - loss: 1.6729 - val_accuracy: 0.3360 - val_loss: 1.7182 - learning_rate: 4.0000e-04 Epoch 20/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3289 - loss: 1.7103 - val_accuracy: 0.3300 - val_loss: 1.7048 - learning_rate: 4.0000e-04 Epoch 21/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.3381 - loss: 1.6660 - val_accuracy: 0.3280 - val_loss: 1.7087 - learning_rate: 4.0000e-04 Epoch 22/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.3491 - loss: 1.6526 - val_accuracy: 0.3421 - val_loss: 1.7095 - learning_rate: 4.0000e-04 Epoch 23/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 47ms/step - accuracy: 0.3470 - loss: 1.6416 - val_accuracy: 0.3421 - val_loss: 1.6968 - learning_rate: 4.0000e-04 Epoch 24/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.3654 - loss: 1.6342 - val_accuracy: 0.3461 - val_loss: 1.7091 - learning_rate: 4.0000e-04 Epoch 25/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.3865 - loss: 1.6072 - val_accuracy: 0.3783 - val_loss: 1.6763 - learning_rate: 4.0000e-04 Epoch 26/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4052 - loss: 1.5322 - val_accuracy: 0.3521 - val_loss: 1.6809 - learning_rate: 4.0000e-04 Epoch 27/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.3683 - loss: 1.5950 - val_accuracy: 0.3622 - val_loss: 1.6869 - learning_rate: 4.0000e-04 Epoch 28/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.3854 - loss: 1.5616 - val_accuracy: 0.3783 - val_loss: 1.6793 - learning_rate: 4.0000e-04 Epoch 29/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4058 - loss: 1.5451 - val_accuracy: 0.3984 - val_loss: 1.6689 - learning_rate: 4.0000e-04 Epoch 30/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4048 - loss: 1.5110 - val_accuracy: 0.3843 - val_loss: 1.6769 - learning_rate: 4.0000e-04 Epoch 31/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4175 - loss: 1.5559 - val_accuracy: 0.3843 - val_loss: 1.6726 - learning_rate: 4.0000e-04 Epoch 32/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4470 - loss: 1.4513 - val_accuracy: 0.3924 - val_loss: 1.6718 - learning_rate: 4.0000e-04 Epoch 33/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.4275 - loss: 1.4480 - val_accuracy: 0.3682 - val_loss: 1.6634 - learning_rate: 4.0000e-04 Epoch 34/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4312 - loss: 1.4856 - val_accuracy: 0.3682 - val_loss: 1.7018 - learning_rate: 4.0000e-04 Epoch 35/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.4417 - loss: 1.4801 - val_accuracy: 0.3984 - val_loss: 1.6345 - learning_rate: 4.0000e-04 Epoch 36/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 48ms/step - accuracy: 0.4592 - loss: 1.4084 - val_accuracy: 0.4064 - val_loss: 1.6561 - learning_rate: 4.0000e-04 Epoch 37/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4588 - loss: 1.4434 - val_accuracy: 0.4004 - val_loss: 1.6434 - learning_rate: 4.0000e-04 Epoch 38/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4531 - loss: 1.3943 - val_accuracy: 0.3964 - val_loss: 1.6604 - learning_rate: 4.0000e-04 Epoch 39/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4582 - loss: 1.3680 - val_accuracy: 0.4004 - val_loss: 1.6628 - learning_rate: 4.0000e-04 Epoch 40/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5020 - loss: 1.3477 - val_accuracy: 0.4366 - val_loss: 1.6496 - learning_rate: 4.0000e-04 Epoch 41/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.4790 - loss: 1.3586 - val_accuracy: 0.4145 - val_loss: 1.6427 - learning_rate: 4.0000e-04 Epoch 42/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.4756 - loss: 1.3570 - val_accuracy: 0.4266 - val_loss: 1.6280 - learning_rate: 4.0000e-04 Epoch 43/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 49ms/step - accuracy: 0.5077 - loss: 1.2930 - val_accuracy: 0.4185 - val_loss: 1.6678 - learning_rate: 4.0000e-04 Epoch 44/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.4973 - loss: 1.3144 - val_accuracy: 0.4366 - val_loss: 1.6394 - learning_rate: 4.0000e-04 Epoch 45/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5019 - loss: 1.2759 - val_accuracy: 0.4064 - val_loss: 1.6664 - learning_rate: 4.0000e-04 Epoch 46/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 52ms/step - accuracy: 0.5279 - loss: 1.2461 - val_accuracy: 0.4346 - val_loss: 1.6333 - learning_rate: 4.0000e-04 Epoch 47/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5367 - loss: 1.2632 - val_accuracy: 0.4225 - val_loss: 1.6363 - learning_rate: 4.0000e-04 Epoch 48/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 51ms/step - accuracy: 0.5051 - loss: 1.2754 - val_accuracy: 0.4366 - val_loss: 1.6292 - learning_rate: 1.6000e-04 Epoch 49/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.5360 - loss: 1.2082 - val_accuracy: 0.4306 - val_loss: 1.6519 - learning_rate: 1.6000e-04 Epoch 50/50 24/24 ━━━━━━━━━━━━━━━━━━━━ 1s 50ms/step - accuracy: 0.5767 - loss: 1.1809 - val_accuracy: 0.4286 - val_loss: 1.6361 - learning_rate: 1.6000e-04
In [73]:
print("Accuracy of our model on test data : " , model.evaluate(x_test,y_test)[1]*100 , "%")
epochs = [i for i in range(50)]
fig , ax = plt.subplots(1,2)
train_acc = history.history['accuracy']
train_loss = history.history['loss']
test_acc = history.history['val_accuracy']
test_loss = history.history['val_loss']
fig.set_size_inches(20,6)
ax[0].plot(epochs , train_loss , label = 'Training Loss')
ax[0].plot(epochs , test_loss , label = 'Testing Loss')
ax[0].set_title('Training & Testing Loss')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[1].plot(epochs , train_acc , label = 'Training Accuracy')
ax[1].plot(epochs , test_acc , label = 'Testing Accuracy')
ax[1].set_title('Training & Testing Accuracy')
ax[1].legend()
ax[1].set_xlabel("Epochs")
plt.show()
16/16 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - accuracy: 0.4288 - loss: 1.6298 Accuracy of our model on test data : 42.85714328289032 %
In [74]:
pred_test = model.predict(x_test)
y_pred = encoder.inverse_transform(pred_test)
y_test = encoder.inverse_transform(y_test)
16/16 ━━━━━━━━━━━━━━━━━━━━ 1s 29ms/step
In [77]:
df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df['Predicted Labels'] = y_pred.flatten()
df['Actual Labels'] = y_test.flatten()
df[5:15]
Out[77]:
| Predicted Labels | Actual Labels | |
|---|---|---|
| 5 | fear | fear |
| 6 | surprise | surprise |
| 7 | disgust | disgust |
| 8 | fear | fear |
| 9 | calm | calm |
| 10 | sad | surprise |
| 11 | disgust | sad |
| 12 | surprise | fear |
| 13 | sad | calm |
| 14 | calm | disgust |
In [78]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize = (12, 10))
cm = pd.DataFrame(cm , index = [i for i in encoder.categories_] , columns = [i for i in encoder.categories_])
sns.heatmap(cm, linecolor='white', cmap='Blues', linewidth=1, annot=True, fmt='')
plt.title('Confusion Matrix', size=20)
plt.xlabel('Predicted Labels', size=14)
plt.ylabel('Actual Labels', size=14)
plt.show()
In [79]:
print(classification_report(y_test, y_pred))
precision recall f1-score support
angry 0.60 0.48 0.54 66
calm 0.54 0.68 0.60 66
disgust 0.34 0.48 0.40 64
fear 0.49 0.40 0.44 72
happy 0.51 0.38 0.44 65
neutral 0.14 0.03 0.05 33
sad 0.23 0.15 0.18 67
surprise 0.36 0.62 0.45 64
accuracy 0.43 497
macro avg 0.40 0.41 0.39 497
weighted avg 0.42 0.43 0.41 497
In [ ]: